/*******************************************************************************

This code cleans a dataset which intersected the 2002 and 2019 zoning maps.

The file raw/nyc0219.dta was originally produced in QGIS and saved as a DTA.

The result of this code is a file, clean/nyc0219.dta, which is merged into
the main PLUTO dataset in pluto.do.

*******************************************************************************/

*** Manage settings

	run "~/Dropbox (MIT)/Research/NYC421a/code/modules/settings.do"
	
*** Load data

	use "$data/raw/nyc0219.dta", clear

*** Cleaning procedures

* Rename misnamed variables (from QGIS)

	foreach v of varlist zonedist3 zonedist4 spdist3 {
		rename `v' `v'_2
	}
	
	rename assesstot assesstot_2
	rename assesstotl assesstot
	rename bldgarea floorarea_2

* Recode BBL

	* Borough
		
	gen boro = .
	replace boro = 1 if borough == "MN"
	replace boro = 2 if borough == "BX"
	replace boro = 3 if borough == "BK"
	replace boro = 4 if borough == "QN"
	replace boro = 5 if borough == "SI"
	
	gen boro_2 = .
	replace boro_2 = 1 if borough_2 == "MN"
	replace boro_2 = 2 if borough_2 == "BX"
	replace boro_2 = 3 if borough_2 == "BK"
	replace boro_2 = 4 if borough_2 == "QN"
	replace boro_2 = 5 if borough_2 == "SI"
	
	tostring boro boro_2, replace
	
	drop borough borough_2
	
	rename boro borough
	rename boro_2 borough_2
	
	* Block/Lot
	
	tostring block block_2 lot lot_2, replace
	
	foreach b of varlist block block_2 {
		
		replace `b' = "0" + `b' if length(`b') < 5
		replace `b' = "0" + `b' if length(`b') < 5
		replace `b' = "0" + `b' if length(`b') < 5
		replace `b' = "0" + `b' if length(`b') < 5
	
	}
	
	foreach l of varlist lot lot_2 {
		
		replace `l' = "0" + `l' if length(`l') < 4
		replace `l' = "0" + `l' if length(`l') < 4
		replace `l' = "0" + `l' if length(`l') < 4
	
	}
	
	* Create BBL
	
	gen bbl = borough + block + lot
	gen bbl_2 = borough_2 + block_2 + lot_2
	
	destring bbl bbl_2 borough borough_2 block block_2 lot lot_2, replace
	format bbl bbl_2 %18.0f 
	
	* Cleanup
	
	order bbl bbl_2
	
	* Clean borough variable
	label define borough_label 1 "Manhattan" 2 "Bronx" 3 "Brooklyn" 4 "Queens" 5 "Staten Island"
	label values borough borough_label
	
	* Remove leading/trailing spaces on addresses
	replace address = strtrim(address)
	
	* Process zoning districts
		
		* Keep residential zoning code
		gen zonedist = zonedist1 if strpos(zonedist1,"R") > 0 & strpos(zonedist1,"PARK") == 0
		replace zonedist = zonedist2 if strpos(zonedist1,"R") == 0 & strpos(zonedist2,"R") > 0 & strpos(zonedist2,"PARK") == 0
		replace zonedist = zonedist3 if strpos(zonedist1,"R") == 0 & strpos(zonedist2,"R") == 0 & strpos(zonedist3,"R") > 0  & strpos(zonedist3,"PARK") == 0
		replace zonedist = zonedist4 if strpos(zonedist1,"R") == 0 & strpos(zonedist2,"R") == 0 & strpos(zonedist3,"R") == 0 & strpos(zonedist4,"R") > 0 & strpos(zonedist4,"PARK") == 0
		replace zonedist = zonedist1 if missing(zonedist)
		drop zonedist1 zonedist2 zonedist3 zonedist4
		split zonedist, parse("/")
		replace zonedist = zonedist2 if strpos(zonedist1,"R") == 0 & strpos(zonedist2,"R") > 0
		drop zonedist1 zonedist2	
		
		* If residential zoning code not listed, convert using commercial zoning code equivalency
		merge m:1 zonedist using "$data/raw/zoning_dist_conversion.dta", nogen keep(1 3)
		replace zonedist = zonedist_to if !missing(zonedist_to)
		drop zonedist_to
		
		* Keep residential zoning code
		gen zonedist_2 = zonedist1_2 if strpos(zonedist1_2,"R") > 0 & strpos(zonedist1_2,"PARK") == 0
		replace zonedist_2 = zonedist2_2 if strpos(zonedist1_2,"R") == 0 & strpos(zonedist2_2,"R") > 0 & strpos(zonedist2_2,"PARK") == 0
		replace zonedist_2 = zonedist1_2 if missing(zonedist_2)
		drop zonedist1_2 zonedist2_2
		split zonedist_2, parse("/")
		replace zonedist_2 = zonedist_22 if strpos(zonedist_21,"R") == 0 & strpos(zonedist_22,"R") > 0
		drop zonedist_21 zonedist_22	
		
		* Temporarily rename zoning district variable (for merge below)
		rename zonedist tmp_zonedist
		rename zonedist_2 zonedist
		
		* If residential zoning code not listed, convert using commercial zoning code equivalency
		merge m:1 zonedist using "$data/raw/zoning_dist_conversion.dta", nogen keep(1 3)
		replace zonedist = zonedist_to if !missing(zonedist_to)
		drop zonedist_to
		
		* Revert naming
		rename zonedist zonedist_2
		rename tmp_zonedist zonedist
		
		* Replace zoning if special district
		
		gen spdist = !missing(spdist1)
		gen spdist_2 = !missing(spdist1_2)
	
		replace zonedist = "Special District" if spdist == 1
		replace zonedist_2 = "Special District" if spdist_2 == 1
		
		drop spdist*

* Rename variables for merge

	foreach v of varlist 	bbl borough block lot address zonedist bldgclass ///
							floorarea resarea unitsres unitstotal assessland ///
							yearbuilt assesstot {
	
		rename `v' `v'_2002
		

	}
	

	rename *_2 *
	
* Compute weighted 2002 land use data
	
	bys bbl: gegen area = sum(area_int)
	gen sh_area = area_int / area
	
	bys bbl_2002: gegen area_2002 = sum(area_int)
	gen sh_area_2002 = area_int / area_2002
		
	bys bbl: gegen max_sh_area = max(sh_area)
	
	foreach v of varlist zonedist_2002 bldgclass_2002 yearbuilt_2002 {
		gen `v'_ = `v' if sh_area == max_sh_area
	}
	
* Collapse to 2019 BBLs
	
	collapse 	(sum) floorarea_2002 resarea_2002 unitsres_2002 unitstotal_2002 assessland_2002 assesstot_2002 ///
				(firstnm) zonedist_2002_ bldgclass_2002_ yearbuilt_2002_ [aw=sh_area_2002], ///
				by(bbl)

	rename *_ *

*** Save data

	save "$data/clean/nyc0219.dta", replace
